************************************************
***Title: pm2pt5_clean.do
***Creators: Joelle Abramowitz, Shooshan Danagoulian, and Owen Fleming*
***Notes: This file cleans the pm2.5 data and constructs a county-date panel.

*For questions, contact
*Owen Fleming
*hg3490@wayne.edu
************************************************


**********SETUP
cd ${path}/data/pm2pt5
clear all
set more off


**********IMPORT
foreach i in 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 {
	import delimited daily_88101_`i'.csv, clear

	if `i'==2006 {
		destring statecode, force replace
		drop if missing(statecode)
	}
	
	save pm2pt5_`i', replace
}


**********APPEND FILES
use pm2pt5_2006, clear
foreach i in 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 {
	append using pm2pt5_`i'
}


**********CLEAN
*county (first, drop AK, HI, PR, and others)
drop if statecode == 2 | statecode == 15 | statecode==72 | statecode==78 | statecode==80
replace statecode = statecode * 1000
gen county = statecode+countycode
drop statecode countycode

*date
gen year = substr(datelocal,1,4)
gen month = substr(datelocal,6,2)
gen day = substr(datelocal,9,2)
destring year month day, replace
drop datelocal
generate date = mdy(month, day, year)
format date %td
drop month day year

*keeps and renames
keep county date arithmeticmean stmaxvalue
rename (arithmeticmean stmaxvalue) (pm2pt5 pm2pt5_max)


**********COLLAPSE
*collapse to county-date level
*Here, take the mean of the arithmeticmean variable... this gives us a mean pm2.5 measure for a given county-day 
*Then, take the max of the stmaxvalue variable... this gives us the pm2.5 measure for a given county day
collapse (mean) pm2pt5 (max) pm2pt5_max, by(county date)


**********SAVE
save pm2pt5, replace


**********REVERT DIRECTORY
cd ${path}













